#define vec2 float2
#define vec3 float3
#define vec4 float4
#define rgb xyz
#define rgba xyzw
#define SIGMA 10.0f
#define BSIGMA 0.1f

const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

float normpdf(float x, float sigma) {
	return 0.39894f * exp(-0.5f * x * x/ (sigma * sigma)) / sigma;
}

float normpdf3(vec3 v, float sigma) {
	return 0.39894f * exp(-0.5f * dot(v,v) / (sigma * sigma)) / sigma;
}

float normalizeColorChannel( float value,  float min,  float max) {
    return (value - min)/(max-min);
}

float ColorDistance(float3 baseColor,float3 dstColor){
    float t = pow(baseColor.x - dstColor.x, 2.0f) + pow(baseColor.y - dstColor.y, 2.0f) + pow(baseColor.z - dstColor.z, 2.0f);
    return t;
}

__kernel void MAIN(
      __read_only image2d_t input,  
      __write_only image2d_t dest_data,
      __global FilterParam* param,
	  int alpha,
      int radius,
      int AutoSkin,
      int SkinColor,
      int Tolerance) 
{
    int W = get_global_size(0);
	int H = get_global_size(1);
    float2 resolution = (float2)(W,H);
    int2 gl_FragCoord = (int2)(get_global_id(0), get_global_id(1));

    vec2 fragCoord = (vec2)(get_global_id0( param), get_global_id1( param));
	vec2 tc = ((float2)(fragCoord.x, fragCoord.y) + (vec2)(0.5f))/resolution.xy;
    tc = (float2)(tc.x, tc.y)*(vec2)(param->origROI[2], param->origROI[3]) + (vec2)(param->origROI[0], param->origROI[1]);
    float4 orig = read_imagef(input, sampler, tc);  
    float tolerance = (float)(Tolerance)/20.0f;	
   
    int SkinColor_r = (int) (((SkinColor)>>16) & 0xff);
    int SkinColor_g = (int) (((SkinColor)>>8) & 0xff);
    int SkinColor_b = (int) ((SkinColor) & 0xff); 
    int kSize = (radius - 1) / 2;
	if(kSize == 0)
		write_imagef(dest_data,gl_FragCoord,orig);
	
    vec3 final_colour = (vec3)(0.0f);
    float Z = 0.0f;   
    float bZ = 1.0f / normpdf(0.0f, BSIGMA);
    for (int i=-kSize; i <= kSize; ++i) {
        for (int j=-kSize; j <= kSize; ++j) {
            vec2 tmp_uv = (vec2)((float)(i),(float)(j))/resolution;
            vec4 cc = read_imagef(input,sampler,tc + tmp_uv);      
            float sigmaJ = fabs((float)(j));
            float sigmaI = fabs((float)(i));
            float factor = normpdf3(cc.xyz-orig.xyz, BSIGMA) * bZ * normpdf(sigmaI,SIGMA) * normpdf(sigmaJ,SIGMA);
            Z += factor;
            final_colour += factor * cc.xyz;
        }
    }
    
    vec4 fragColor = (vec4)(final_colour / Z, orig.w);
    
    bool isSkin = false; 

    float skin_cb_min = 0.0f;
    float skin_cb_max = 0.0f;
    float skin_cr_min = 0.0f;
    float skin_cr_max = 0.0f;

    if(AutoSkin > 0){
        skin_cb_min = 100.0f;
        skin_cb_max = 118.0f;
        skin_cr_min = 121.0f;
        skin_cr_max = 161.0f;
        vec4 rgb = fragColor * 255.0f;
        vec4 ycbcr = rgb;
        ycbcr.y = 128.0f - rgb.x*0.148f - rgb.y*0.291f + rgb.z*0.439f;
        ycbcr.z = 128.0f + rgb.x*0.439f - rgb.y*0.368f - rgb.z*0.071f;
        if (ycbcr.y > skin_cb_min && ycbcr.y < skin_cb_max &&
            ycbcr.z > skin_cr_min && ycbcr.z < skin_cr_max) {
            isSkin = true; 
        }
    }else{
        vec3 rgb = (vec3)(SkinColor_r,SkinColor_g,SkinColor_b)/255.0f; 
        float dis = ColorDistance(rgb,orig.xyz);
        if(dis < tolerance)
            isSkin = true;        
    }

    if (!isSkin)       
        fragColor = orig;          
	
    vec4 outColor = mix(fragColor, orig, (float4)(1.0f - (float)alpha/100.0f));
    write_imagef(dest_data,gl_FragCoord,outColor);
}



